Now we want to predict pokemon type
In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
In [2]:
dataset = pd.read_csv('Pokemon.csv')
print 'total rows: ' + str(dataset.shape[0])
print 'total columns: ' + str(dataset.shape[1])
dataset = dataset.iloc[np.random.permutation(len(dataset))]
dataset = dataset.iloc[:, 2:]
dataset.head()
Out[2]:
In [3]:
sns.pairplot(dataset, hue="Type 1")
plt.show()
Herm, this is hard, YOLO?
In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
dataset = dataset.fillna(value = 'none')
x_data = dataset.iloc[:, 1:].values
x_data[:, 0] = LabelEncoder().fit_transform(x_data[:, 0])
x_data[:, -1] = LabelEncoder().fit_transform(x_data[:, -1])
x_data = StandardScaler().fit_transform(x_data)
y_datalabel = dataset.iloc[:, 0]
y_data = LabelEncoder().fit_transform(dataset.iloc[:, 0])
onehot = np.zeros((y_data.shape[0], np.unique(y_data).shape[0]))
for i in xrange(y_data.shape[0]):
onehot[i, y_data[i]] = 1.0
x_train, x_test, y_train, y_test, _, y_test_label = train_test_split(x_data, onehot, y_data, test_size = 0.2)
print np.unique(y_datalabel)
print np.unique(y_data)
In [14]:
size_layer_first = 512
size_layer_second = 256
learning_rate = 0.1
beta = 0.0005
X = tf.placeholder("float", [None, x_train.shape[1]])
Y = tf.placeholder("float", [None, np.unique(y_data).shape[0]])
layer1 = tf.Variable(tf.random_normal([x_train.shape[1], size_layer_first]))
layer2 = tf.Variable(tf.random_normal([size_layer_first, size_layer_first]))
layer3 = tf.Variable(tf.random_normal([size_layer_first, size_layer_second]))
layer4 = tf.Variable(tf.random_normal([size_layer_second, np.unique(y_data).shape[0]]))
bias1 = tf.Variable(tf.random_normal([size_layer_first], stddev = 0.1))
bias2 = tf.Variable(tf.random_normal([size_layer_first], stddev = 0.1))
bias3 = tf.Variable(tf.random_normal([size_layer_second], stddev = 0.1))
bias4 = tf.Variable(tf.random_normal([np.unique(y_data).shape[0]], stddev = 0.1))
hidden1 = tf.nn.relu(tf.matmul(X, layer1) + bias1)
hidden2 = tf.nn.relu(tf.matmul(hidden1, layer2) + bias2)
hidden3 = tf.nn.relu(tf.matmul(hidden2, layer3) + bias3)
hidden4 = tf.matmul(hidden3, layer4) + bias4
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = hidden4))
loss += tf.nn.l2_loss(layer1) * beta + tf.nn.l2_loss(layer2) * beta + tf.nn.l2_loss(layer3) * beta + tf.nn.l2_loss(layer4) * beta
optimizer = tf.train.AdagradOptimizer(learning_rate = learning_rate).minimize(loss)
correct_pred = tf.equal(tf.argmax(hidden4, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
In [15]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
BATCH_SIZE = 30
EPOCH, LOSS, ACC = [], [], []
for i in xrange(500):
last = time.time()
EPOCH.append(i)
TOTAL_LOSS, ACCURACY = 0, 0
for n in xrange(0, (x_train.shape[0] // BATCH_SIZE) * BATCH_SIZE, BATCH_SIZE):
cost, _ = sess.run([loss, optimizer], feed_dict = {X : x_train[n: n + BATCH_SIZE, :], Y : y_train[n: n + BATCH_SIZE, :]})
ACCURACY += sess.run(accuracy, feed_dict = {X : x_train[n: n + BATCH_SIZE, :], Y : y_train[n: n + BATCH_SIZE, :]})
TOTAL_LOSS += cost
TOTAL_LOSS /= (x_train.shape[0] // BATCH_SIZE)
ACCURACY /= (x_train.shape[0] // BATCH_SIZE)
LOSS.append(TOTAL_LOSS); ACC.append(ACCURACY)
timetaken = (time.time() - last) / ((x_train.shape[0] // BATCH_SIZE) * 1.0)
if (i + 1) % 50 == 0:
print 'epoch: ' + str(i + 1) + ', loss: ' + str(TOTAL_LOSS) + ', accuracy: ' + str(ACCURACY) + ', s / batch: ' + str(timetaken)
In [16]:
from sklearn import metrics
testing_acc, logits = sess.run([accuracy, tf.cast(tf.argmax(hidden4, 1), tf.int32)], feed_dict = {X : x_test, Y : y_test})
print 'testing accuracy: ' + str(testing_acc)
print(metrics.classification_report(y_test_label, logits, target_names = np.unique(y_datalabel)))
In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.subplot(1, 2, 1)
plt.plot(EPOCH, LOSS)
plt.xlabel('epoch'); plt.ylabel('loss')
plt.subplot(1, 2, 2)
plt.plot(EPOCH, ACC)
plt.xlabel('epoch'); plt.ylabel('accuracy')
plt.show()
In [ ]: